The new peak calling round applied on the previous notebook significantly increased the number of the features identified in our dataset. Therefore, we must need to repeat the standard downstream analysis, including data normalization, dimensionality reduction analysis and batch correction to account for this change in the number of features detected.
library(Seurat)
library(SeuratWrappers)
library(Signac)
library(harmony)
library(tidyverse)
set.seed(1234)
cell_type = "Cytotoxic"
color_palette <- c("#1CFFCE", "#90AD1C", "#C075A6",
"#85660D", "#5A5156", "#AA0DFE",
"#F8A19F", "#F7E1A0", "#1C8356",
"#FEAF16", "#822E1C", "#C4451C",
"#1CBE4F", "#325A9B", "#F6222E",
"#FE00FA", "#FBE426", "#16FF32",
"black", "#3283FE", "#B00068",
"#DEA0FD", "#B10DA1", "#E4E1E3",
"#90AD1C", "#FE00FA", "#85660D",
"#3B00FB", "#822E1C", "coral2",
"#1CFFCE", "#1CBE4F", "#3283FE",
"#FBE426", "#F7E1A0", "#325A9B",
"#2ED9FF", "#B5EFB5", "#5A5156",
"#DEA0FD", "#FEAF16", "#683B79",
"#B10DA1", "#1C7F93", "#F8A19F",
"dark orange", "#FEAF16", "#FBE426",
"Brown")
# Paths
path_to_obj <- paste0(
here::here("scATAC-seq/results/R_objects/level_4/"),
cell_type,
"/03.",
cell_type,
"_annotated_peak_calling_level_4.rds",
sep = ""
)
path_to_save <- paste0(
here::here("scATAC-seq/results/R_objects/level_4/"),
cell_type,
"/04.",
cell_type,
"_integration_peak_calling_level_4.rds",
sep = ""
)
seurat <- readRDS(path_to_obj)
seurat
## An object of class Seurat
## 71767 features across 3960 samples within 1 assay
## Active assay: peaks_redefined (71767 features, 0 variable features)
## 1 dimensional reduction calculated: umap
# Normalization, dimensionality reduction
seurat <- seurat %>%
RunTFIDF() %>%
FindTopFeatures(min.cutoff = 10) %>%
RunSVD() %>%
RunUMAP(reduction = "lsi", dims = 2:40)
DepthCor(seurat)
DimPlot(seurat,
cols = color_palette,
group.by = "annotation_paper",
pt.size = 0.1)
# Visualize UMAP's confounders
confounders <- c("library_name", "sex", "age_group", "hospital", "assay")
umaps_before_integration <- purrr::map(confounders, function(x) {
p <- DimPlot(seurat, group.by = x, pt.size = 0.1)
p
})
names(umaps_before_integration) <- confounders
print("UMAP colored by GEM:")
## [1] "UMAP colored by GEM:"
umaps_before_integration$library_name + NoLegend()
print("UMAP colored by sex, age group, cell hashing status, sampling center and assay:")
## [1] "UMAP colored by sex, age group, cell hashing status, sampling center and assay:"
umaps_before_integration[2:length(umaps_before_integration)]
## $sex
##
## $age_group
##
## $hospital
##
## $assay
seurat <- RunHarmony(
object = seurat,
dims = 2:40,
group.by.vars = 'assay',
reduction = 'lsi',
assay.use = 'peaks_redefined',
project.dim = FALSE,
max.iter.harmony = 20
)
# Non-linear dimension reduction and clustering
seurat <- RunUMAP(seurat, dims = 2:24, reduction = 'harmony')
DimPlot(seurat,
cols = color_palette,
group.by = "annotation_paper",
pt.size = 0.8)
# Visualize UMAP's confounders
umaps_after_integration <- purrr::map(confounders, function(x) {
p <- DimPlot(seurat, group.by = x, pt.size = 0.1)
p
})
names(umaps_after_integration) <- confounders
print("UMAP colored by GEM:")
## [1] "UMAP colored by GEM:"
umaps_after_integration$library_name + NoLegend()
print("UMAP colored by sex, age group, cell hashing status, sampling center and assay:")
## [1] "UMAP colored by sex, age group, cell hashing status, sampling center and assay:"
umaps_after_integration[2:length(umaps_before_integration)]
## $sex
##
## $age_group
##
## $hospital
##
## $assay
# Save integrated Seurat object
saveRDS(seurat, path_to_save)
sessionInfo()
## R version 4.0.3 (2020-10-10)
## Platform: x86_64-apple-darwin13.4.0 (64-bit)
## Running under: macOS Big Sur 10.16
##
## Matrix products: default
## BLAS/LAPACK: /Users/pauli/opt/anaconda3/envs/Motif_TF/lib/libopenblasp-r0.3.10.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] forcats_0.5.0 stringr_1.4.0 dplyr_1.0.7 purrr_0.3.4 readr_1.4.0 tidyr_1.1.3 tibble_3.1.2 ggplot2_3.3.5 tidyverse_1.3.0 harmony_1.0 Rcpp_1.0.6 Signac_1.2.1 SeuratWrappers_0.3.0 SeuratObject_4.0.2 Seurat_4.0.3 BiocStyle_2.16.1
##
## loaded via a namespace (and not attached):
## [1] readxl_1.3.1 backports_1.1.10 fastmatch_1.1-0 plyr_1.8.6 igraph_1.2.6 lazyeval_0.2.2 splines_4.0.3 BiocParallel_1.22.0 listenv_0.8.0 SnowballC_0.7.0 scattermore_0.7 GenomeInfoDb_1.24.2 digest_0.6.27 htmltools_0.5.1.1 fansi_0.5.0 magrittr_2.0.1 tensor_1.5 cluster_2.1.0 ROCR_1.0-11 remotes_2.4.1 globals_0.14.0 Biostrings_2.56.0 modelr_0.1.8 matrixStats_0.59.0 docopt_0.7.1 spatstat.sparse_2.0-0 colorspace_2.0-2 rvest_0.3.6 blob_1.2.1 ggrepel_0.9.1 haven_2.3.1 xfun_0.18 sparsesvd_0.2 crayon_1.4.1 RCurl_1.98-1.2 jsonlite_1.7.2 spatstat.data_2.1-0 survival_3.2-7 zoo_1.8-9 glue_1.4.2 polyclip_1.10-0 gtable_0.3.0 zlibbioc_1.34.0 XVector_0.28.0 leiden_0.3.8 future.apply_1.7.0 BiocGenerics_0.34.0 abind_1.4-5 scales_1.1.1 DBI_1.1.0 miniUI_0.1.1.1
## [52] viridisLite_0.4.0 xtable_1.8-4 reticulate_1.20 spatstat.core_2.2-0 rsvd_1.0.3 stats4_4.0.3 htmlwidgets_1.5.3 httr_1.4.2 RColorBrewer_1.1-2 ellipsis_0.3.2 ica_1.0-2 pkgconfig_2.0.3 farver_2.1.0 dbplyr_1.4.4 ggseqlogo_0.1 uwot_0.1.10 deldir_0.2-10 here_1.0.1 utf8_1.2.1 labeling_0.4.2 tidyselect_1.1.1 rlang_0.4.11 reshape2_1.4.4 later_1.2.0 cellranger_1.1.0 munsell_0.5.0 tools_4.0.3 cli_3.0.0 generics_0.1.0 broom_0.7.2 ggridges_0.5.3 evaluate_0.14 fastmap_1.1.0 yaml_2.2.1 goftest_1.2-2 fs_1.5.0 knitr_1.30 fitdistrplus_1.1-5 RANN_2.6.1 pbapply_1.4-3 future_1.21.0 nlme_3.1-150 mime_0.11 slam_0.1-47 RcppRoll_0.3.0 xml2_1.3.2 rstudioapi_0.11 compiler_4.0.3 plotly_4.9.4.1 png_0.1-7 spatstat.utils_2.2-0
## [103] reprex_0.3.0 tweenr_1.0.1 stringi_1.6.2 RSpectra_0.16-0 lattice_0.20-41 Matrix_1.3-4 vctrs_0.3.8 pillar_1.6.1 lifecycle_1.0.0 BiocManager_1.30.10 spatstat.geom_2.2-0 lmtest_0.9-38 RcppAnnoy_0.0.18 data.table_1.14.0 cowplot_1.1.1 bitops_1.0-7 irlba_2.3.3 httpuv_1.6.1 patchwork_1.1.1 GenomicRanges_1.40.0 R6_2.5.0 bookdown_0.21 promises_1.2.0.1 KernSmooth_2.23-17 gridExtra_2.3 lsa_0.73.2 IRanges_2.22.1 parallelly_1.26.1 codetools_0.2-17 MASS_7.3-53 assertthat_0.2.1 rprojroot_2.0.2 withr_2.4.2 qlcMatrix_0.9.7 sctransform_0.3.2 Rsamtools_2.4.0 S4Vectors_0.26.0 GenomeInfoDbData_1.2.3 hms_0.5.3 mgcv_1.8-33 parallel_4.0.3 grid_4.0.3 rpart_4.1-15 rmarkdown_2.5 Rtsne_0.15 ggforce_0.3.2 lubridate_1.7.9 shiny_1.6.0